# Differential expression on Kallisto data
# Preliminary samples - 2016 dataset
# Packages and dependence
packageCheckClassic <- function(x){
#
for( i in x ){
if( ! require( i , character.only = TRUE ) ){
install.packages( i , dependencies = TRUE )
require( i , character.only = TRUE )
}
}
}
packageCheckClassic(c('DESeq2','adegenet','vsn','devtools','BiocManager','ggplot2','ggrepel','markdown','pheatmap','RColorBrewer','genefilter','gplots','vegan','dplyr','limma'))
## Le chargement a nécessité le package : DESeq2
## Le chargement a nécessité le package : S4Vectors
## Warning: le package 'S4Vectors' a été compilé avec la version R 4.1.3
## Le chargement a nécessité le package : stats4
## Le chargement a nécessité le package : BiocGenerics
##
## Attachement du package : 'BiocGenerics'
## Les objets suivants sont masqués depuis 'package:stats':
##
## IQR, mad, sd, var, xtabs
## Les objets suivants sont masqués depuis 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
##
## Attachement du package : 'S4Vectors'
## Les objets suivants sont masqués depuis 'package:base':
##
## expand.grid, I, unname
## Le chargement a nécessité le package : IRanges
## Le chargement a nécessité le package : GenomicRanges
## Le chargement a nécessité le package : GenomeInfoDb
## Le chargement a nécessité le package : SummarizedExperiment
## Le chargement a nécessité le package : MatrixGenerics
## Le chargement a nécessité le package : matrixStats
##
## Attachement du package : 'MatrixGenerics'
## Les objets suivants sont masqués depuis 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Le chargement a nécessité le package : Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attachement du package : 'Biobase'
## L'objet suivant est masqué depuis 'package:MatrixGenerics':
##
## rowMedians
## Les objets suivants sont masqués depuis 'package:matrixStats':
##
## anyMissing, rowMedians
## Le chargement a nécessité le package : adegenet
## Le chargement a nécessité le package : ade4
##
## Attachement du package : 'ade4'
## L'objet suivant est masqué depuis 'package:GenomicRanges':
##
## score
## L'objet suivant est masqué depuis 'package:BiocGenerics':
##
## score
##
## /// adegenet 2.1.10 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
## Le chargement a nécessité le package : vsn
## Le chargement a nécessité le package : devtools
## Le chargement a nécessité le package : usethis
## Le chargement a nécessité le package : BiocManager
## Bioconductor version '3.14' is out-of-date; the current release version '3.16'
## is available with R version '4.2'; see https://bioconductor.org/install
##
## Attachement du package : 'BiocManager'
## L'objet suivant est masqué depuis 'package:devtools':
##
## install
## Le chargement a nécessité le package : ggplot2
## Le chargement a nécessité le package : ggrepel
## Le chargement a nécessité le package : markdown
## Le chargement a nécessité le package : pheatmap
## Le chargement a nécessité le package : RColorBrewer
## Le chargement a nécessité le package : genefilter
##
## Attachement du package : 'genefilter'
## Les objets suivants sont masqués depuis 'package:MatrixGenerics':
##
## rowSds, rowVars
## Les objets suivants sont masqués depuis 'package:matrixStats':
##
## rowSds, rowVars
## Le chargement a nécessité le package : gplots
##
## Attachement du package : 'gplots'
## L'objet suivant est masqué depuis 'package:IRanges':
##
## space
## L'objet suivant est masqué depuis 'package:S4Vectors':
##
## space
## L'objet suivant est masqué depuis 'package:stats':
##
## lowess
## Le chargement a nécessité le package : vegan
## Le chargement a nécessité le package : permute
##
## Attachement du package : 'permute'
## L'objet suivant est masqué depuis 'package:devtools':
##
## check
## Le chargement a nécessité le package : lattice
## This is vegan 2.6-4
## Le chargement a nécessité le package : dplyr
##
## Attachement du package : 'dplyr'
## L'objet suivant est masqué depuis 'package:Biobase':
##
## combine
## L'objet suivant est masqué depuis 'package:matrixStats':
##
## count
## Les objets suivants sont masqués depuis 'package:GenomicRanges':
##
## intersect, setdiff, union
## L'objet suivant est masqué depuis 'package:GenomeInfoDb':
##
## intersect
## Les objets suivants sont masqués depuis 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## Les objets suivants sont masqués depuis 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## Les objets suivants sont masqués depuis 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## Les objets suivants sont masqués depuis 'package:stats':
##
## filter, lag
## Les objets suivants sont masqués depuis 'package:base':
##
## intersect, setdiff, setequal, union
## Le chargement a nécessité le package : limma
## Warning: le package 'limma' a été compilé avec la version R 4.1.3
##
## Attachement du package : 'limma'
## L'objet suivant est masqué depuis 'package:DESeq2':
##
## plotMA
## L'objet suivant est masqué depuis 'package:BiocGenerics':
##
## plotMA
#BiocManager::install('tximport', force = TRUE)
#BiocManager::install('apeglm')
#BiocManager::install('ashr')
#BiocManager::install("EnhancedVolcano")
#BiocManager::install("arrayQualityMetrics")
if (!require(devtools)) install.packages("devtools")
devtools::install_github("yanlinlin82/ggvenn")
## Skipping install of 'ggvenn' from a github remote, the SHA1 (25fd3b55) has not changed since last install.
## Use `force = TRUE` to force installation
library("adegenet")
library('ggvenn')
## Le chargement a nécessité le package : grid
library('tximport')
library('apeglm')
library('ashr')
library('EnhancedVolcano')
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
library('BiocManager')
source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
## ℹ SHA-1 hash of file is "015fc0457e61e3e93a903e69a24d96d2dac7b9fb"
# Working environment and data loading
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
#candidateGenes<-read.csv('candidateGenes.csv',header=T,sep=',')
samplesSaccharina<-read.table('saccharinaDesignSimple.txt',header=T)
samplesHedophylum<-read.table('hedophylumDesignSimple.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/kelpProject/kallistoOutput'
outputPath<-'/Users/mmeynadier/Documents/kelpProject/DESeq2Output'
#setwd(dataPath)
# DDS object
# If data from kallisto
tx2geneSaccharina<-read.table('Saccharina_tx2gene',header=T)
tx2geneHedophylum<-read.table('Hedophylum_tx2gene',header=T)
#
# # Data importation - txImport
setwd(dataPath)
filesSaccharina<-paste0(samplesSaccharina$sample)
txiSaccharina<-tximport(files = filesSaccharina,type='kallisto',tx2gene = tx2geneSaccharina)
## Note: importing `abundance.h5` is typically faster than `abundance.tsv`
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8
## transcripts missing from tx2gene: 8998
## summarizing abundance
## summarizing counts
## summarizing length
filesHedophylum<-paste0(samplesHedophylum$sample)
txiHedophylum<-tximport(files = filesHedophylum,type='kallisto',tx2gene = tx2geneHedophylum)
## Note: importing `abundance.h5` is typically faster than `abundance.tsv`
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8 9
## transcripts missing from tx2gene: 9442
## summarizing abundance
## summarizing counts
## summarizing length
ddsSaccharina<-DESeqDataSetFromTximport(txiSaccharina,colData=samplesSaccharina,design= ~treatment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
ddsHedophylum<-DESeqDataSetFromTximport(txiHedophylum,colData=samplesHedophylum,design= ~treatment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
# pre-filtering
keepSaccharina <- rowSums(counts(ddsSaccharina)) >= 10
ddsSaccharina <- ddsSaccharina[keepSaccharina,]
keepHedophylum <- rowSums(counts(ddsHedophylum)) >= 10
ddsHedophylum <- ddsHedophylum[keepHedophylum,]
# Differential expression analysis
ddsSaccharina<-DESeq(ddsSaccharina)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsSaccharina))
## [,1]
## [1,] "Intercept"
## [2,] "treatment_T1_vs_C"
## [3,] "treatment_T2_vs_C"
## [4,] "treatment_T3_vs_C"
ddsHedophylum<-DESeq(ddsHedophylum)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsHedophylum))
## [,1]
## [1,] "Intercept"
## [2,] "treatment_T1_vs_C"
## [3,] "treatment_T2_vs_C"
## [4,] "treatment_T3_vs_C"
# Exploring the results - Saccharina
S_C_vs_T1 <- results(ddsSaccharina,contrast=c("treatment", "C", "T1"))
S_C_vs_T2 <- results(ddsSaccharina,contrast=c("treatment", "C", "T2"))
S_C_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "C", "T3"))
S_T1_vs_T2 <- results(ddsSaccharina,contrast=c("treatment", "T1", "T2"))
S_T1_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "T1", "T3"))
S_T2_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "T2", "T3"))
DESeq2::plotMA(S_C_vs_T1,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T1")

DESeq2::plotMA(S_C_vs_T2,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T2")

DESeq2::plotMA(S_C_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T3")

DESeq2::plotMA(S_T1_vs_T2,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T2")

DESeq2::plotMA(S_T1_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T3")

DESeq2::plotMA(S_T2_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT2 vs T3")

vsdSaccharina <- vst(ddsSaccharina, blind=T)
meanSdPlot(assay(vsdSaccharina))

ntd <- normTransform(ddsSaccharina)
meanSdPlot(assay(ntd))

select <- order(rowMeans(counts(ddsSaccharina,normalized=TRUE)),
decreasing=TRUE)[1:20]
df <- as.data.frame(colData(ddsSaccharina)[,"treatment"])
pheatmap(assay(vsdSaccharina)[select,], cluster_rows=FALSE, show_rownames=F,
cluster_cols=FALSE, annotation_col=df)

pcaData <- plotPCA(vsdSaccharina, intgroup="treatment", returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=treatment)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()

sampleDists <- dist(t(assay(vsdSaccharina)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <-vsdSaccharina$treatment
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors)

# Hedophylum
H_C_vs_T1 <- results(ddsHedophylum,contrast=c("treatment", "C", "T1"))
H_C_vs_T2 <- results(ddsHedophylum,contrast=c("treatment", "C", "T2"))
H_C_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "C", "T3"))
H_T1_vs_T2 <- results(ddsHedophylum,contrast=c("treatment", "T1", "T2"))
H_T1_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "T1", "T3"))
H_T2_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "T2", "T3"))
DESeq2::plotMA(H_C_vs_T1,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T1")

DESeq2::plotMA(H_C_vs_T2,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T2")

DESeq2::plotMA(H_C_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T3")

DESeq2::plotMA(H_T1_vs_T2,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T2")

DESeq2::plotMA(H_T1_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T3")

DESeq2::plotMA(H_T2_vs_T3,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT2 vs T3")

vsdHedophylum <- vst(ddsHedophylum, blind=T)
meanSdPlot(assay(vsdHedophylum))

ntd <- normTransform(ddsHedophylum)
meanSdPlot(assay(ntd))

select <- order(rowMeans(counts(ddsHedophylum,normalized=TRUE)),
decreasing=TRUE)[1:20]
df <- as.data.frame(colData(ddsHedophylum)[,"treatment"])
pheatmap(assay(vsdHedophylum)[select,], cluster_rows=FALSE, show_rownames=F,
cluster_cols=FALSE, annotation_col=df)

pcaData <- plotPCA(vsdHedophylum, intgroup="treatment", returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=treatment)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed()

sampleDists <- dist(t(assay(vsdHedophylum)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- vsdHedophylum$treatment
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors)

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] fr_FR.UTF-8/fr_FR.UTF-8/fr_FR.UTF-8/C/fr_FR.UTF-8/fr_FR.UTF-8
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] EnhancedVolcano_1.12.0 ashr_2.2-54
## [3] apeglm_1.16.0 tximport_1.22.0
## [5] ggvenn_0.1.10 limma_3.50.3
## [7] dplyr_1.1.1 vegan_2.6-4
## [9] lattice_0.20-45 permute_0.9-7
## [11] gplots_3.1.3 genefilter_1.76.0
## [13] RColorBrewer_1.1-3 pheatmap_1.0.12
## [15] markdown_1.5 ggrepel_0.9.3
## [17] ggplot2_3.4.2 BiocManager_1.30.20
## [19] devtools_2.4.5 usethis_2.1.6
## [21] vsn_3.62.0 adegenet_2.1.10
## [23] ade4_1.7-22 DESeq2_1.34.0
## [25] SummarizedExperiment_1.24.0 Biobase_2.54.0
## [27] MatrixGenerics_1.6.0 matrixStats_0.63.0
## [29] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1
## [31] IRanges_2.28.0 S4Vectors_0.32.4
## [33] BiocGenerics_0.40.0
##
## loaded via a namespace (and not attached):
## [1] plyr_1.8.8 igraph_1.4.1 splines_4.1.2
## [4] BiocParallel_1.28.3 digest_0.6.31 invgamma_1.1
## [7] htmltools_0.5.5 SQUAREM_2021.1 fansi_1.0.4
## [10] magrittr_2.0.3 memoise_2.0.1 cluster_2.1.4
## [13] tzdb_0.3.0 remotes_2.4.2 readr_2.1.4
## [16] Biostrings_2.62.0 annotate_1.72.0 extrafont_0.19
## [19] vroom_1.6.1 extrafontdb_1.0 bdsmatrix_1.3-6
## [22] prettyunits_1.1.1 colorspace_2.1-0 blob_1.2.4
## [25] xfun_0.38 hexbin_1.28.3 callr_3.7.3
## [28] crayon_1.5.2 RCurl_1.98-1.10 jsonlite_1.8.4
## [31] survival_3.5-5 ape_5.7-1 glue_1.6.2
## [34] gtable_0.3.3 zlibbioc_1.40.0 XVector_0.34.0
## [37] seqinr_4.2-23 proj4_1.0-12 DelayedArray_0.20.0
## [40] pkgbuild_1.4.0 Rttf2pt1_1.3.12 maps_3.4.1
## [43] scales_1.2.1 mvtnorm_1.1-3 DBI_1.1.3
## [46] miniUI_0.1.1.1 Rcpp_1.0.10 xtable_1.8-4
## [49] emdbook_1.3.12 bit_4.0.5 preprocessCore_1.56.0
## [52] truncnorm_1.0-9 profvis_0.3.7 htmlwidgets_1.6.2
## [55] httr_1.4.5 ellipsis_0.3.2 farver_2.1.1
## [58] urlchecker_1.0.1 pkgconfig_2.0.3 XML_3.99-0.14
## [61] sass_0.4.5 locfit_1.5-9.7 utf8_1.2.3
## [64] labeling_0.4.2 tidyselect_1.2.0 rlang_1.1.0
## [67] reshape2_1.4.4 later_1.3.0 AnnotationDbi_1.56.2
## [70] munsell_0.5.0 tools_4.1.2 cachem_1.0.7
## [73] cli_3.6.1 generics_0.1.3 RSQLite_2.3.0
## [76] evaluate_0.20 stringr_1.5.0 fastmap_1.1.1
## [79] yaml_2.3.7 processx_3.8.0 knitr_1.42
## [82] bit64_4.0.5 fs_1.6.1 caTools_1.18.2
## [85] purrr_1.0.1 KEGGREST_1.34.0 nlme_3.1-162
## [88] mime_0.12 ash_1.0-15 ggrastr_1.0.1
## [91] compiler_4.1.2 rstudioapi_0.14 beeswarm_0.4.0
## [94] curl_5.0.0 png_0.1-8 affyio_1.64.0
## [97] tibble_3.2.1 geneplotter_1.72.0 bslib_0.4.2
## [100] stringi_1.7.12 ps_1.7.3 ggalt_0.4.0
## [103] Matrix_1.5-1 vctrs_0.6.1 pillar_1.9.0
## [106] lifecycle_1.0.3 jquerylib_0.1.4 irlba_2.3.5.1
## [109] bitops_1.0-7 httpuv_1.6.9 R6_2.5.1
## [112] affy_1.72.0 promises_1.2.0.1 KernSmooth_2.23-20
## [115] vipor_0.4.5 sessioninfo_1.2.2 MASS_7.3-58.3
## [118] gtools_3.9.4 pkgload_1.3.2 withr_2.5.0
## [121] GenomeInfoDbData_1.2.7 hms_1.1.3 mgcv_1.8-42
## [124] parallel_4.1.2 coda_0.19-4 rmarkdown_2.21
## [127] mixsqp_0.3-48 bbmle_1.0.25 numDeriv_2016.8-1.1
## [130] shiny_1.7.4 ggbeeswarm_0.7.1